rm(list=ls()) 
setwd("")#input working directory
options(scipen=999) 

install.packages('ggplot2', repos = "http://cran.us.r-project.org")
install.packages("ggsignif")
install.packages('reshape2', repos = "http://cran.us.r-project.org")
install.packages("gridExtra")
install.packages("stargazer")
install.packages('sandwich', repos = "http://cran.us.r-project.org")
install.packages('lmtest', repos = "http://cran.us.r-project.org")
install.packages('data.table')
install.packages('pysch')
install.packages("papeR")
install.packages("dotwhisker")

library(ggplot2);library(plyr);library(dplyr);library(ggsignif);library(reshape2);library(reshape2)
library(gridExtra);library(sandwich);library(lmtest);library(data.table);library(stargazer);library(psych)
library(broom);library(purrr)
library(Hmisc);library(ggpubr);library(papeR)
library(broom);library(purrr);library(papeR);library(readr);library(REdaS);library(freqtables)
library(translateR);library(readr);library(ggplot2);library(stringr);library(plyr);
library(splitstackshape);library(data.table);library(dplyr);library(cowplot);library(Jmisc);library(psych);library(qwraps2);library(xtable);library(sandwich);library(stargazer);library(lmtest);library(ggpubr);library(multiwayvcov)
library(wordcloud);library(stm);library(readxl)
library(broom);library(purrr);library(ggpubr);library(papeR);library(readr);library(REdaS);library(freqtables);library(fastDummies)#fretables is better than redas

#Load data
data <- read.csv("1_Police_Marathi_qualtrics.csv")
data2 <- read.csv("1_Police_Marathi_Numeric_qualtrics.csv")

##Fix glitch with ftrust2 (NA's refer to 1's for this button)
#dummy for policewoman video
data2$female_officer <- ifelse(data2$condition==1|data2$condition==3|data2$condition==5|data2$condition==7, 1, 0)
data2$ftrust2_new <- ifelse(data2$female_officer == 1 & is.na(data2$ftrust2), 1, data2$ftrust2) #changing NA to 1 in ftrust2, IFF female_officer=1 (i.e. they saw a policewoman video)
table(data2$ftrust2_new)
select(data2, ftrust2, ftrust2_new)
table(data2$ftrust2)
table(data2$ftrust2_new)

##Coalescing all the male/female separate answers 
data2 <- data2 %>% mutate(trust = coalesce(ftrust1,mtrust1)) 
data2 <- data2 %>% mutate(trust2 = coalesce(ftrust2_new,mtrust2)) 
data2 <- data2 %>% mutate(corrupt = coalesce(fcorrupt,mcorrupt)) 
data2 <- data2 %>% mutate(torture = coalesce(ftorture,mtorture)) 
data2 <- data2 %>% mutate(abuse = coalesce(fabuse,mabuse)) 
data2 <- data2 %>% mutate(bias = coalesce(fbias,mbias)) 
data2 <- data2 %>% mutate(comp = coalesce(fcomp,mcomp)) 
data2 <- data2 %>% mutate(lawful = coalesce(flawful,mlawful)) 
data2 <- data2 %>% mutate(compromise = coalesce(fcompromise,mcompromise)) 
data2 <- data2 %>% mutate(catch = coalesce(fcatch,mcatch)) 
data2 <- data2 %>% mutate(fear = coalesce(ffear,mfear)) 
data2 <- data2 %>% mutate(prompt = coalesce(fprompt,mprompt)) 
data2 <- data2 %>% mutate(interrogate = coalesce(finterrogate,minterrogate)) 
data2 <- data2 %>% mutate(time = coalesce(ftime,mtime))

#creating additive scales
data2 <- mutate(data2, trustworthy = rowSums(cbind(trust,trust2), na.rm = T))
data2 <- mutate(data2, fair = rowSums(cbind(corrupt, abuse, torture, bias, comp, lawful), na.rm = T))
data2 <- mutate(data2, effective = rowSums(cbind(compromise, catch, fear, prompt, interrogate, time), na.rm = T))
data2 <- mutate(data2, legit = rowSums(cbind(trustworthy,fair,effective), na.rm = T))##Overall legitimacy

#Averages
data2$trustworthy3 <- (data2$trustworthy)/2
data2$fair3 <- (data2$fair)/6
data2$effective3 <- (data2$effective)/6
data2$legit3 <- (data2$legit)/14

##
psych::alpha(data2[,c("trust","trust2","corrupt", "abuse", "torture", "bias", "comp", "lawful","compromise", "catch", "fear", "prompt", "interrogate", "time")])

###Descriptive statistics (mean, minimum, maximum, and sd) for dataset
#Table A15
results <- rbind(apply(!is.na(data2[,c('trust','trust2', 'trustworthy', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')]),2,sum),
                 apply(data2[,c('trust','trust2','trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,mean,na.rm=T),	
                 apply(data2[,c('trust','trust2', 'trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,var,na.rm=T),	
                 apply(data2[,c('trust','trust2','trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,sd,na.rm=T),	
                 apply(data2[,c('trust','trust2','trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,min,na.rm=T),	
                 apply(data2[,c('trust','trust2','trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,max,na.rm=T),	
                 apply(data2[,c('trust','trust2','trustworthy3', 'corrupt','torture','abuse','bias','comp','lawful','fair3', 'compromise','catch','fear','prompt','interrogate','time','effective3','legit3')],2,sum,na.rm=T))			
rownames(results) <- c("count", "mean", "var", "sd", "min", "max", "sum")
t(results)
t(results[1:6,])
stargazer(results[1:6,], summary=FALSE, rownames=TRUE, flip=TRUE, digits = 2)

##Exploratory tests
wr <- subset(data2, data2$condition==1)#policewoman-rape
mr <- subset(data2, data2$condition==2)#policeman-rape
wk <- subset(data2, data2$condition==3)#policewoman-kidnapping
mk <- subset(data2, data2$condition==4)#policeman-kidnapping
wm <- subset(data2, data2$condition==5)#policewoman-murder
mm <- subset(data2, data2$condition==6)#policeman-murder
wd <- subset(data2, data2$condition==7)#policewoman-dowry
md <- subset(data2, data2$condition==8)#policeman-dowry

#kidnapping
t1 <- t.test(wk$trustworthy, mk$trustworthy)
t2 <- t.test(wk$fair, mk$fair)
t3 <- t.test(wk$effective, mk$effective)
#murder
t4 <- t.test(wm$trustworthy, mm$trustworthy)
t5 <- t.test(wm$fair, mm$fair)
t6 <- t.test(wm$effective, mm$effective)
#dowry
t7 <- t.test(wd$trustworthy, md$trustworthy)
t8 <- t.test(wd$fair, md$fair)
t9 <- t.test(wd$effective, md$effective)
#rape
t10 <- t.test(wr$trustworthy, mr$trustworthy)
t11 <- t.test(wr$fair, mr$fair)
t12 <- t.test(wr$effective, mr$effective)
tab1 <- map_df(list(t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12), tidy)
tab1
setnames(tab1, old=c("estimate1","estimate2", "estimate"), new=c("female (t)", "male (c)", "diff"))
tab1
tab1$sig <- ""
tab1$sig[tab1$p.value <= .1]  <- "*"
tab1$sig[tab1$p.value <= .05]  <- "**"
tab1$sig[tab1$p.value <= .001] <- "***"
tab1
tab1$category <- c('kidnapping_trustworthy', 'kidnapping_fair', 'kidnapping_effective', 'murder_trustworthy', 'murder_fair', 'murder_effective', 
                   'dowry_trustworthy', 'dowry_fair', 'dowry_effective','rape_trustworthy', 'rape_fair', 'rape_effective')
tab1 <- tab1 %>%
  select('male (c)', 'female (t)', diff, conf.low, conf.high, p.value, category, sig)
tab1

##
#kidnapping
w1 <- wilcox.test(wk$trustworthy, mk$trustworthy,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w2 <- wilcox.test(wk$fair, mk$fair,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w3 <- wilcox.test(wk$effective, mk$effective,alternative="two.sided",exact=TRUE,conf.int=TRUE)
#murder
w4 <- wilcox.test(wm$trustworthy, mm$trustworthy,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w5 <- wilcox.test(wm$fair, mm$fair,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w6 <- wilcox.test(wm$effective, mm$effective,alternative="two.sided",exact=TRUE,conf.int=TRUE)
#dowry
w7 <- wilcox.test(wd$trustworthy, md$trustworthy,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w8 <- wilcox.test(wd$fair, md$fair,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w9 <- wilcox.test(wd$effective, md$effective,alternative="two.sided",exact=TRUE,conf.int=TRUE)
#rape
w10 <- wilcox.test(wr$trustworthy, mr$trustworthy,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w11 <- wilcox.test(wr$fair, mr$fair,alternative="two.sided",exact=TRUE,conf.int=TRUE)
w12 <- wilcox.test(wr$effective, mr$effective,alternative="two.sided",exact=TRUE,conf.int=TRUE)
wilcox_tab <- map_df(list(w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12), tidy)
wilcox_tab
wilcox_tab$star <- ""
wilcox_tab$star[wilcox_tab$p.value <= .1]  <- "*"
wilcox_tab$star[wilcox_tab$p.value <= .05]  <- "**"
wilcox_tab$star[wilcox_tab$p.value <= .001] <- "***"
wilcox_tab

##Manipulations of crime category and officer gender
data2$female_officer <- ifelse(data2$condition==1|data2$condition==3|data2$condition==5|data2$condition==7, 1, 0)
data2$female_officer <- as.factor(data2$female_officer)

data2$crime <- ifelse(data2$condition==1|data2$condition==2, "rape", 
                      ifelse(data2$condition==3|data2$condition==4, "kidnapping", 
                             ifelse(data2$condition==5|data2$condition==6, "murder", 
                                    "dowry")))

data2$gendered <- ifelse(data2$crime=="dowry"|data2$crime=="rape", 1, 0)
data2$gendered <- as.factor(data2$gendered)

##Pooled crime 
data2$category <- "newvar"
data2$category <- ifelse(data2$female_officer==1 & data2$gendered==1, "Policewoman_VAW", data2$category)
data2$category <- ifelse(data2$female_officer==0 & data2$gendered==1, "Policeman_VAW", data2$category)
data2$category <- ifelse(data2$female_officer==1 & data2$gendered==0, "Policewoman_NonVAW", data2$category)
data2$category <- ifelse(data2$female_officer==0 & data2$gendered==0, "Policeman_NonVAW", data2$category)
table(data2$category)

policewoman_gbv <- subset(data2, data2$category=="Policewoman_VAW")
policewoman_nongbv <- subset(data2, data2$category=="Policewoman_NonVAW")
policeman_gbv <- subset(data2, data2$category=="Policeman_VAW")
policeman_nongbv <- subset(data2, data2$category=="Policeman_NonVAW")

##
a1 <- t.test(policewoman_gbv$legit3, policeman_gbv$legit3)
a2 <- t.test(policewoman_nongbv$legit3, policeman_nongbv$legit3)
a3 <- t.test(policewoman_gbv$legit3, policewoman_nongbv$legit3)
a4 <- t.test(policeman_gbv$legit3, policeman_nongbv$legit3)
tab1 <- map_df(list(a1, a2, a3, a4), tidy)
tab1$sig <- ""
tab1$sig[tab1$p.value <= .1]  <- "*"
tab1$sig[tab1$p.value <= .05]  <- "**"
tab1$sig[tab1$p.value <= .001] <- "***"
tab1
tab1$eval <- c("A-B","C-D","A-C","B-D")
tab1$eval <- factor(tab1$eval, levels = tab1$eval)

##Show all respondents
plot1 <- ggplot(tab1,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("All Respondents") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot1 <- plot1 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                       axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot1 

##
s1 <- wilcox.test(policewoman_gbv$legit3, policeman_gbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
s2 <- wilcox.test(policewoman_nongbv$legit3, policeman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
s3 <- wilcox.test(policewoman_gbv$legit3, policewoman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
s4 <- wilcox.test(policeman_gbv$legit3, policeman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)

wilcox_tab <- map_df(list(s1, s2, s3, s4), tidy)
wilcox_tab
wilcox_tab$star <- ""
wilcox_tab$star[wilcox_tab$p.value <= .1]  <- "*"
wilcox_tab$star[wilcox_tab$p.value <= .05]  <- "**"
wilcox_tab$star[wilcox_tab$p.value <= .001] <- "***"
wilcox_tab
wilcox_tab$eval <- c("A-B","C-D","A-C","B-D")
wilcox_tab$eval <- factor(wilcox_tab$eval, levels = wilcox_tab$eval)
wilcox_tab

##Female Respondents
women <- subset(data2, data2$gender==2)
xpolicewoman_gbv <- subset(women, women$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(women, women$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(women, women$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(women, women$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##graph
plot2 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Female") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot2 <- plot2 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                       axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot2

#
fig_combined1 <- ggarrange(plot1, plot2, ncol=2, nrow=1, common.legend = TRUE, legend="bottom")
fig_combined1#Figure A11

##
j1 <- wilcox.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
j2 <- wilcox.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
j3 <- wilcox.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)
j4 <- wilcox.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3,alternative="two.sided",exact=TRUE,conf.int=TRUE)

wilcox_tab2 <- map_df(list(j1, j2, j3, j4), tidy)
wilcox_tab2
wilcox_tab2$star <- ""
wilcox_tab2$star[wilcox_tab2$p.value <= .1]  <- "*"
wilcox_tab2$star[wilcox_tab2$p.value <= .05]  <- "**"
wilcox_tab2$star[wilcox_tab2$p.value <= .001] <- "***"
wilcox_tab2
wilcox_tab2$eval <- c("A-B","C-D","A-C","B-D")
wilcox_tab2$eval <- factor(wilcox_tab2$eval, levels = wilcox_tab2$eval)
wilcox_tab2

#
wilcox_t <- as.data.frame(wilcox_tab)
wilcox_t <- select(wilcox_t, 1:5, 8,9)
wilcox_t2 <- as.data.frame(wilcox_tab2)
wilcox_t2 <- select(wilcox_t2, 1:5, 8,9)
#Table A18
print(xtable(wilcox_t), include.rownames=FALSE)
print(xtable(wilcox_t2), include.rownames=FALSE)

##
data2$category <- factor(data2$category,levels = c("Policeman_NonVAW", "Policeman_VAW", "Policewoman_NonVAW", "Policewoman_VAW"))

fig1 <- ggplot(data2, aes(x = category, y = legit3)) +      
  stat_summary(geom = "bar", fun = mean, position = "dodge", fill="grey") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
  stat_summary(aes(label=round(..y..,3)), fun=mean, geom="text", size=4.5,hjust = -0.5)
fig1 <- fig1 + scale_fill_grey() + theme_bw() + theme(legend.title=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())+ ggtitle("All Respondents")
fig1 <- fig1 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) + coord_flip(ylim = c(0, 4.5))
fig1 <- fig1 + coord_flip(ylim = c(2, 3)) 
fig1


##Women respondents only
women <- subset(data2, data2$gender==2)#504 respondents

fig2 <- ggplot(women, aes(x = category, y = legit3)) +      
  stat_summary(geom = "bar", fun = mean, position = "dodge", fill="grey") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
  stat_summary(aes(label=round(..y..,3)), fun=mean, geom="text", size=4.5,hjust = -0.5)
fig2 <- fig2 + scale_fill_grey() + theme_bw() + theme(legend.title=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())+ ggtitle("Female")
fig2 <- fig2 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) + coord_flip(ylim = c(0, 4.5))
fig2 <- fig2 + coord_flip(ylim = c(2, 3)) 
fig2

#Figure 6
ggarrange(fig1, fig2, legend = "bottom", common.legend = TRUE)


##Descriptives
#crime type
data2$gendered <- ifelse(data2$crime=="dowry"|data2$crime=="rape", 1, 0)
table(data2$gendered)

#female officer
data2$female_officer <- ifelse(data2$condition==1|data2$condition==3|data2$condition==5|data2$condition==7, 1, 0)
table(data2$female_officer)

#urban dummy (word municipal)
data2$locs  <- with(data2, paste0(urban, rural))
data2$urb <- ifelse(grepl("Municipal", data2$locs), 1, 0) 
data2$rural <- ifelse(grepl("Municipal", data2$locs), 0, 1) 

#female respondent 
data2$female <- ifelse(data2$gender==2, 1, 0)
data2$male <- ifelse(data2$gender==1, 1, 0)

#Religion
data2$hindu <- ifelse(data2$religion==1, 1, 0)
data2$buddhist <- ifelse(data2$religion==4, 1, 0)
data2$muslim <- ifelse(data2$religion==2, 1, 0)
data2$other_religions<- ifelse(data2$religion %in% c(3,5,6), 1, 0)



#Education
data2$less_hs <- ifelse(data2$education==1|data2$education==2, 1, 0)
data2$more_hs <- ifelse(data2$less_hs, 0, 1)
table(data2$less_hs)
table(data2$more_hs)
data2$less_college <- ifelse(data2$education==3|data2$education==4, 1, 0)
data2$college <- ifelse(data2$education==5, 1, 0)

table(data$caste)
table(data2$caste)

#Caste
data2$obc <- ifelse(data2$caste==1, 1, 0)
data2$sc <- ifelse(data2$caste==2, 1, 0)
data2$upper <- ifelse(data2$caste==3, 1, 0)
data2$st <- ifelse(data2$caste==4, 1, 0)
data2$other <- ifelse(data2$caste==5, 1, 0)

#friend in police
data2$friend <- ifelse(data2$friend==1, 1, 0)

#interacted with police
data2$interact <- ifelse(data2$interaction==1|data2$interaction==2, 1, 0)

#married
data2$married <- ifelse(data2$marital==2, 1, 0)

#station < 20 min
data2$close_station <- ifelse(data2$station==1, 1, 0)

#Descriptive statistics (mean, minimum, maximum, and sd) for dataset
results <- rbind(apply(!is.na(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim', 'other_religions',  'upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')]),2,sum),
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,mean,na.rm=T),	
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,var,na.rm=T),	
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,sd,na.rm=T),	
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,min,na.rm=T),	
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,max,na.rm=T),	
                 apply(data2[,c('female_officer','gendered', 'female', 'hindu','buddhist','muslim','other_religions','upper','obc','sc', 'st', 'less_hs','less_college','college','urb','friend','interact','close_station')],2,sum,na.rm=T))			
rownames(results) <- c("count", "mean", "var", "sd", "min", "max", "sum")
#Table A14
t(results)
t(results[1:6,])
stargazer(results, summary=FALSE, rownames=TRUE, flip=TRUE, digits = 3)



##
newdat3 <- select(data2, 'female_officer','gendered', 'female', 'hindu','buddhist','muslim', 'other_religions', 'upper','obc','sc','st', 'less_hs','less_college','college','urb','friend','interact','close_station')

##
newdat3$female_officer <- as.factor(newdat3$female_officer)
newdat3$gendered <- as.factor(newdat3$gendered)
newdat3$female <- as.factor(newdat3$female)

#
descriptives1 <- xtable(papeR::summarize(newdat3, type = "numeric", group = "female", digits=3))
names(descriptives1)
descriptives1 <- descriptives1[,c(1,2,4,7,8,16)]

#
descriptives2 <- xtable(papeR::summarize(newdat3, type = "numeric", group = "gendered", digits=3))
names(descriptives2)
descriptives2 <- descriptives2[,c(2,4,7,8,16)]

#
descriptives3 <- xtable(papeR::summarize(newdat3, type = "numeric", group = "female_officer", digits=3))
names(descriptives3)
descriptives3 <- descriptives3[,c(2,4,7,8,16)]

##join 
test <- cbind(descriptives1, descriptives2, descriptives3)
test$female <- revalue(as.factor(test$female), c("0"="Male", "1"="Female"))
test$gendered <- revalue(as.factor(test$gendered), c("0"="Non-GBV", "1"="GBV (T)"))
test$female_officer <- revalue(as.factor(test$female_officer), c("0"="Policeman", "1"="Policewoman (T)"))
test#Table A16
print(xtable(test), include.rownames=FALSE)


####################
##Regressions
reg1 <- lm(legit3~female_officer, data=data2)
reg2 <- lm(legit3~female_officer + male, data=data2)
reg3 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg4 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg5 <- lm(legit3~female_officer*gendered, data=data2)
reg6 <- lm(legit3~female_officer*gendered + male, data=data2)
reg7 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg8 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 
reg7 <- coeftest(reg7, vcov.=vcovHC(reg7, type="HC1")) 
reg8 <- coeftest(reg8, vcov.=vcovHC(reg8, type="HC1")) 

#Table 1
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, omit.stat=c("adj.rsq","ser"))

##
linearHypothesis(reg7,"female_officer = female_officer:gendered")
linearHypothesis(reg7,"gendered = female_officer:gendered")

##Respondent gender
reg1 <- lm(legit3~female_officer*male, data=data2)
reg2 <- lm(legit3~female_officer*male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg3 <- lm(legit3~female_officer*male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 

#Table A22
stargazer(reg1, reg2, reg3, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, omit.stat=c("adj.rsq","ser"))

##Male and Female Respondents
men <- subset(data2, data2$gender==1)

reg1 <- lm(legit3~female_officer, data=men)
reg2 <- lm(legit3~female_officer + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=men)
reg3 <- lm(legit3~female_officer + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=men)

reg4 <- lm(legit3~female_officer*gendered, data=men)
reg5 <- lm(legit3~female_officer*gendered + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=men)
reg6 <- lm(legit3~female_officer*gendered + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=men)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 

#Table A21
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, omit.stat=c("adj.rsq","ser"))

linearHypothesis(reg6,"female_officer = female_officer:gendered")
linearHypothesis(reg6,"gendered = female_officer:gendered")

##Female
women <- subset(data2, data2$gender==2)

reg1 <- lm(legit3~female_officer, data=women)
reg2 <- lm(legit3~female_officer + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=women)
reg3 <- lm(legit3~female_officer + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=women)

reg4 <- lm(legit3~female_officer*gendered, data=women)
reg5 <- lm(legit3~female_officer*gendered + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=women)
reg6 <- lm(legit3~female_officer*gendered + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=women)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 

#Table A20
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, omit.stat=c("adj.rsq","ser"))

linearHypothesis(reg6,"female_officer = female_officer:gendered")
linearHypothesis(reg6,"gendered = female_officer:gendered")


##Differences
plot2 #female

#Male respondents
men <- subset(data2, data2$gender==1)
xpolicewoman_gbv <- subset(men, men$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(men, men$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(men, men$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(men, men$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##
plot22 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Male") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot22 <- plot22 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                         axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot22

##Urban
data2$locs  <- with(data2, paste0(urban, rural))
data2$urb <- ifelse(grepl("Municipal", data2$locs), 1, 0) 
data2$rural <- ifelse(grepl("Municipal", data2$locs), 0, 1) 

urb_resp <- subset(data2, data2$urb==1)

xpolicewoman_gbv <- subset(urb_resp, urb_resp$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(urb_resp, urb_resp$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(urb_resp, urb_resp$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(urb_resp, urb_resp$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##
plot33 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Urban") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot33 <- plot33 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                         axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot33

#Rural 
rur <- subset(data2, data2$urb==0)

xpolicewoman_gbv <- subset(rur, rur$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(rur, rur$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(rur, rur$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(rur, rur$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##
plot44 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Rural") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot44 <- plot44 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                         axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot44

##Less hs
data2$less_hs <- ifelse(data2$education==1|data2$education==2, 1, 0)

edu <- subset(data2, data2$less_hs==1)

xpolicewoman_gbv <- subset(edu, edu$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(edu, edu$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(edu, edu$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(edu, edu$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##
plot55 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("< HS Education") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot55 <- plot55 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                         axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot55

#More HS
edu <- subset(data2, data2$less_hs==0)

xpolicewoman_gbv <- subset(edu, edu$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(edu, edu$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(edu, edu$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(edu, edu$category=="Policeman_NonVAW")

##
b1 <- t.test(xpolicewoman_gbv$legit3, xpoliceman_gbv$legit3)
b2 <- t.test(xpolicewoman_nongbv$legit3, xpoliceman_nongbv$legit3)
b3 <- t.test(xpolicewoman_gbv$legit3, xpolicewoman_nongbv$legit3)
b4 <- t.test(xpoliceman_gbv$legit3, xpoliceman_nongbv$legit3)
tab2 <- map_df(list(b1, b2, b3, b4), tidy)
tab2$sig <- ""
tab2$sig[tab2$p.value <= .1]  <- "*"
tab2$sig[tab2$p.value <= .05]  <- "**"
tab2$sig[tab2$p.value <= .001] <- "***"
tab2
tab2$eval <- c("A-B","C-D","A-C","B-D")
tab2$eval <- factor(tab2$eval, levels = tab2$eval)

##
plot66 <- ggplot(tab2,aes(x=reorder(eval, desc(eval)), y=estimate,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("> HS Education") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-0.3, 0.3)
plot66 <- plot66 + theme(axis.text.x = element_text(colour="grey20", size=16, hjust=.5, vjust=.5),
                         axis.text.y = element_text(colour="grey20", size=16)) + theme(plot.title = element_text(size = 16, hjust=0.5)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  

plot66

##Figure 7
ggarrange(plot22, plot2, plot33,plot44,plot55,plot66, ncol=2, nrow=3)


##Exploring mechanisms
data2$crime <- as.factor(data2$crime)

reg1 <- lm(legit3~relevel(data2$crime, ref = "murder")*female_officer, data=data2)
reg2 <- lm(legit3~relevel(data2$crime, ref = "murder")*female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg3 <- lm(legit3~relevel(data2$crime, ref = "murder")*female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 

#Table A25
stargazer(reg1, reg2, reg3, type = 'text', omit = c("urb", "less_hs","obc","sc","\\bst\\b","upper","buddhist","muslim","friend","interact","\\bmale\\b"), omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, omit = c("urb", "less_hs","obc","sc","\\bst\\b","upper","buddhist","muslim","friend","interact","\\bmale\\b"), omit.stat=c("adj.rsq","ser"))

##
#Now also include subindices of trust,fairness, efficacy. 
reg01 <- lm(trustworthy3~female_officer*gendered, data=data2)
reg02 <- lm(trustworthy3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg03 <- lm(trustworthy3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg1 <- lm(fair3~female_officer*gendered, data=data2)
reg2 <- lm(fair3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg3 <- lm(fair3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg4 <- lm(effective3~female_officer*gendered, data=data2)
reg5 <- lm(effective3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data2)
reg6 <- lm(effective3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data2)

reg01 <- coeftest(reg01, vcov.=vcovHC(reg01, type="HC1")) 
reg02 <- coeftest(reg02, vcov.=vcovHC(reg02, type="HC1")) 
reg03 <- coeftest(reg03, vcov.=vcovHC(reg03, type="HC1")) 
reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 

#Table A26
stargazer(reg01, reg02, reg03, reg1, reg2, reg3, reg4, reg5, reg6, type = 'text', omit = c("urb", "less_hs","obc","sc","\\bst\\b","upper","buddhist","muslim","friend","interact"), omit.stat=c("adj.rsq","ser"))
stargazer(reg01, reg02, reg03, reg1, reg2, reg3, reg4, reg5, reg6, omit = c("urb", "less_hs","obc","sc","\\bst\\b","upper","buddhist","muslim","friend","interact"), omit.stat=c("adj.rsq","ser"))

##
linearHypothesis(reg03,"female_officer = female_officer:gendered")
linearHypothesis(reg03,"gendered = female_officer:gendered")

linearHypothesis(reg3,"female_officer = female_officer:gendered")
linearHypothesis(reg3,"gendered = female_officer:gendered")

linearHypothesis(reg6,"female_officer = female_officer:gendered")
linearHypothesis(reg6,"gendered = female_officer:gendered")


###Manipulation checks
data2$pass_1 <- ifelse(grepl("dowry|rape", data2$crime) == grepl("1|4", data2$manip1) & grepl("murder|kidnapping", data2$crime) == grepl("2|3", data2$manip1), "pass", "fail")

data2$pass_1 <- ifelse(grepl(1, data2$gendered) == grepl("1|4", data2$manip1) & grepl(0, data2$gendered) == grepl("2|3", data2$manip1), "pass", "fail")

#
data2$pass_2 <- ifelse(grepl(1, data2$female_officer) == grepl("2", data2$manip2) & grepl(0, data2$female_officer) == grepl("1", data2$manip2), "pass", "fail")

##
passing1 <- subset(data2, data2$pass_1=="pass")

##Regressions
reg1 <- lm(legit3~female_officer, data=passing1)
reg2 <- lm(legit3~female_officer + male, data=passing1)
reg3 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=passing1)
reg4 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=passing1)

reg5 <- lm(legit3~female_officer*gendered, data=passing1)
reg6 <- lm(legit3~female_officer*gendered + male, data=passing1)
reg7 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=passing1)
reg8 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=passing1)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 
reg7 <- coeftest(reg7, vcov.=vcovHC(reg7, type="HC1")) 
reg8 <- coeftest(reg8, vcov.=vcovHC(reg8, type="HC1")) 

#Table A23
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, omit.stat=c("adj.rsq","ser"))

##testing significance between the two values
linearHypothesis(reg7,"female_officer = female_officer:gendered")
linearHypothesis(reg7,"gendered = female_officer:gendered")

##passing2
passing2 <- subset(data2, data2$pass_2=="pass")
##Regressions
reg1 <- lm(legit3~female_officer, data=passing2)
reg2 <- lm(legit3~female_officer + male, data=passing2)
reg3 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=passing2)
reg4 <- lm(legit3~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=passing2)

reg5 <- lm(legit3~female_officer*gendered, data=passing2)
reg6 <- lm(legit3~female_officer*gendered + male, data=passing2)
reg7 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=passing2)
reg8 <- lm(legit3~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=passing2)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 
reg7 <- coeftest(reg7, vcov.=vcovHC(reg7, type="HC1")) 
reg8 <- coeftest(reg8, vcov.=vcovHC(reg8, type="HC1")) 

#Table A24
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, omit.stat=c("adj.rsq","ser"))

##testing significance between the two values
linearHypothesis(reg7,"female_officer = female_officer:gendered")
linearHypothesis(reg7,"gendered = female_officer:gendered")

##################################
##Breakdown by each question 
names(data2)
test <- select(policewoman_gbv, 112:125)
descriptives1 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives1)
descriptives1 <- descriptives1[,c(1,5,6,10)]

test <- select(policewoman_nongbv, 112:125)
descriptives2 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives2)
descriptives2 <- descriptives2[,c(1,5,6,10)]

test <- select(policeman_gbv, 112:125)
descriptives3 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives3)
descriptives3 <- descriptives3[,c(1,5,6,10)]

test <- select(policeman_nongbv, 112:125)
descriptives4 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives4)
descriptives4 <- descriptives4[,c(1,5,6,10)]

final <- cbind(descriptives1, descriptives2, descriptives3, descriptives4)
print(xtable(final), include.rownames=FALSE, digits=3)#Table A17 (A)

##Female respondents 
women <- subset(data2, data2$gender==2)
xpolicewoman_gbv <- subset(women, women$category=="Policewoman_VAW")
xpolicewoman_nongbv <- subset(women, women$category=="Policewoman_NonVAW")
xpoliceman_gbv <- subset(women, women$category=="Policeman_VAW")
xpoliceman_nongbv <- subset(women, women$category=="Policeman_NonVAW")


test <- select(xpolicewoman_gbv, 112:125)
descriptives1 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives1)
descriptives1 <- descriptives1[,c(1,5,6,10)]

test <- select(xpolicewoman_nongbv, 112:125)
descriptives2 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives2)
descriptives2 <- descriptives2[,c(1,5,6,10)]

test <- select(xpoliceman_gbv, 112:125)
descriptives3 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives3)
descriptives3 <- descriptives3[,c(1,5,6,10)]

test <- select(xpoliceman_nongbv, 112:125)
descriptives4 <- xtable(papeR::summarize(test, digits = 3))
names(descriptives4)
descriptives4 <- descriptives4[,c(1,5,6,10)]

final2 <- cbind(descriptives1, descriptives2, descriptives3, descriptives4)
print(xtable(final2), include.rownames=FALSE, digits=3)#Table A17 (B)

#################
##Subindices of trust, fairness, efficacy
r1 <- t.test(policewoman_gbv$trustworthy, policeman_gbv$trustworthy)
r2 <- t.test(policewoman_gbv$fair, policeman_gbv$fair)
r3 <- t.test(policewoman_gbv$effective, policeman_gbv$effective)

tab5 <- map_df(list(r1, r2, r3), tidy)
tab5
setnames(tab5, old=c("estimate1","estimate2", "estimate"), new=c("Policewoman VAW", "Policeman VAW", "diff"))
tab5
tab5$star <- ""
tab5$star[tab5$p.value <= .05]  <- "*"
tab5$star[tab5$p.value <= .01]  <- "**"
tab5$star[tab5$p.value <= .001] <- "***"
tab5
tab5$eval <- c("Trustworthy","Fair","Effective")
tab5$eval <- factor(tab5$eval, levels = tab5$eval)

##
plot5 <- ggplot(tab5,aes(x=reorder(eval, desc(eval)), y=diff,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Policewoman VAW - Policeman VAW") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-2.5, 1)
plot5 <- plot5 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
              axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 12)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  
plot5


r4 <- t.test(policewoman_gbv$trustworthy, policewoman_nongbv$trustworthy)
r5 <- t.test(policewoman_gbv$fair, policewoman_nongbv$fair)
r6 <- t.test(policewoman_gbv$effective, policewoman_nongbv$effective)

tab5 <- map_df(list(r4, r5, r6), tidy)
tab5
setnames(tab5, old=c("estimate1","estimate2", "estimate"), new=c("Policewoman VAW", "Policewoman NonVAW", "diff"))
tab5
tab5$star <- ""
tab5$star[tab5$p.value <= .05]  <- "*"
tab5$star[tab5$p.value <= .01]  <- "**"
tab5$star[tab5$p.value <= .001] <- "***"
tab5
tab5$eval <- c("Trustworthy","Fair","Effective")
tab5$eval <- factor(tab5$eval, levels = tab5$eval)

##
plot6 <- ggplot(tab5,aes(x=reorder(eval, desc(eval)), y=diff,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Policewoman VAW - Policewoman NonVAW") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-2.5, 1)
plot6 <- plot6 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                       axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 12)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  
plot6

ggarrange(plot5,plot6)


##Female Respondents
r7 <- t.test(xpolicewoman_gbv$trustworthy, xpoliceman_gbv$trustworthy)
r8 <- t.test(xpolicewoman_gbv$fair, xpoliceman_gbv$fair)
r9 <- t.test(xpolicewoman_gbv$effective, xpoliceman_gbv$effective)

tab5 <- map_df(list(r7, r8, r9), tidy)
tab5
setnames(tab5, old=c("estimate1","estimate2", "estimate"), new=c("Policewoman VAW", "Policeman VAW", "diff"))
tab5
tab5$star <- ""
tab5$star[tab5$p.value <= .05]  <- "*"
tab5$star[tab5$p.value <= .01]  <- "**"
tab5$star[tab5$p.value <= .001] <- "***"
tab5
tab5$eval <- c("Trustworthy","Fair","Effective")
tab5$eval <- factor(tab5$eval, levels = tab5$eval)

##
plot7 <- ggplot(tab5,aes(x=reorder(eval, desc(eval)), y=diff,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Policewoman VAW - Policeman VAW (Female)") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-2.5, 1)
plot7 <- plot7 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                       axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 12)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  
plot7


r7 <- t.test(xpolicewoman_gbv$trustworthy, xpolicewoman_nongbv$trustworthy)
r8 <- t.test(xpolicewoman_gbv$fair, xpolicewoman_nongbv$fair)
r9 <- t.test(xpolicewoman_gbv$effective, xpolicewoman_nongbv$effective)

tab5 <- map_df(list(r7, r8, r9), tidy)
tab5
setnames(tab5, old=c("estimate1","estimate2", "estimate"), new=c("policewomanVAW", "policewomanNonVAW", "diff"))
tab5
tab5$star <- ""
tab5$star[tab5$p.value <= .05]  <- "*"
tab5$star[tab5$p.value <= .01]  <- "**"
tab5$star[tab5$p.value <= .001] <- "***"
tab5
tab5$eval <- c("Trustworthy","Fair","Effective")
tab5$eval <- factor(tab5$eval, levels = tab5$eval)

##
plot8 <- ggplot(tab5,aes(x=reorder(eval, desc(eval)), y=diff,ymin=conf.low, ymax=conf.high)) + 
  geom_pointrange() + coord_flip() + ggtitle("Policewoman VAW - Policewoman NonVAW (Female)") + xlab("") + 
  ylab("") + geom_hline(yintercept=0, linetype="dashed") + ylim(-2.5,1)
plot8 <- plot8 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                       axis.text.y = element_text(colour="grey20", size=12)) + theme(plot.title = element_text(size = 12)) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))  
plot8

#Figure 8
ggarrange(plot5,plot6,plot7,plot8)


##PCA
myPr <- prcomp(~trust+trust2+corrupt+torture+abuse+bias+comp+lawful+compromise+catch+fear+prompt+interrogate+time, center=TRUE, scale=TRUE, na.action = na.exclude, data=data2)
myPr
summary(myPr)
str(myPr)
myPr$x

##attach pca to dat
data3 <- cbind(data2, myPr$x[,1:4])##

##
reg1 <- lm(PC1~female_officer, data=data3)
reg2 <- lm(PC1~female_officer + male, data=data3)
reg3 <- lm(PC1~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data3)
reg4 <- lm(PC1~female_officer + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data3)

reg5 <- lm(PC1~female_officer*gendered, data=data3)
reg6 <- lm(PC1~female_officer*gendered + male, data=data3)
reg7 <- lm(PC1~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim, data=data3)
reg8 <- lm(PC1~female_officer*gendered + male + urb + less_hs + obc + sc + st + upper + buddhist + muslim + friend + interact, data=data3)

reg1 <- coeftest(reg1, vcov.=vcovHC(reg1, type="HC1")) 
reg2 <- coeftest(reg2, vcov.=vcovHC(reg2, type="HC1")) 
reg3 <- coeftest(reg3, vcov.=vcovHC(reg3, type="HC1")) 
reg4 <- coeftest(reg4, vcov.=vcovHC(reg4, type="HC1")) 
reg5 <- coeftest(reg5, vcov.=vcovHC(reg5, type="HC1")) 
reg6 <- coeftest(reg6, vcov.=vcovHC(reg6, type="HC1")) 
reg7 <- coeftest(reg7, vcov.=vcovHC(reg7, type="HC1")) 
reg8 <- coeftest(reg8, vcov.=vcovHC(reg8, type="HC1")) 

#Table A19
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, type = 'text', omit.stat=c("adj.rsq","ser"))
stargazer(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, omit.stat=c("adj.rsq","ser"))


####Other Survey Questions
##Handle the crime?
perception_a <- data2 %>% freq_table(gendered, handle)

fig1 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.5, position = position_dodge(0.5))
fig1 <- fig1 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig1 <- fig1 + theme(plot.title = element_text(size = 14, hjust=0.5)) + ggtitle("In your opinion, who is best suited to tackle the crime?") 
fig1 <- fig1 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 75, 5), limits=c(0, 75)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig1 <- fig1 + scale_fill_manual(values = c("gray71", "grey40")) + scale_x_discrete(labels=c("Police", "Family", "CBI", "Village Leaders"))
fig1

##Bail/Arrest?
perception_a <- data2 %>% freq_table(gendered, bail)

fig2 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.5))
fig2 <- fig2 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig2 <- fig2 + theme(plot.title = element_text(size = 14, hjust=0.5)) + ggtitle("Should the perpetrator be arrested with bail, \nwithout bail, or not arrested?") 
fig2 <- fig2 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 100, 10), limits=c(0, 100)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig2 <- fig2 + scale_fill_manual(values = c("gray71", "grey40")) + scale_x_discrete(labels=c("Without Bail", "With Bail", "Not Arrested"))
fig2

##Mob
perception_a <- data2 %>% freq_table(gendered, mob)

fig3 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.5, position = position_dodge(0.5))
fig3 <- fig3 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig3 <- fig3 + theme(plot.title = element_text(size = 12, hjust=0.5)) + ggtitle("Suppose the victim's friends caught the suspect before SHO Kumar. \nShould they beat the accused or contact SHO Kumar and let the police handle the situation?") 
fig3 <- fig3 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 80, 5), limits=c(0, 80)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig3 <- fig3 + scale_fill_manual(values = c("gray71", "grey40")) + scale_x_discrete(labels=c("Beat him", "Contact the SHO"))
fig3

#
fig_combined1 <- ggarrange(fig1, fig2, ncol=2, nrow=1,common.legend = TRUE, legend="bottom")
fig_combined1 <- annotate_figure(fig_combined1, left = "Percent")
fig_combined1#Figure A12


###########
perception_c <- data2 %>% freq_table(gender, type)
perception_c <- na.omit(perception_c)

fig3 <- ggplot(perception_c, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.5, position = position_dodge(0.5))
fig3 <- fig3 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig3 <- fig3 + theme(plot.title = element_text(size = 15, hjust=0.5)) + ggtitle("")
fig3 <- fig3 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=-0.2),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 95, 10), limits=c(0, 95)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig3 <- fig3 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male Respondents","Female Respondents")) + theme(legend.position="bottom")
fig3 <- fig3 + scale_x_discrete(labels=c("Only Gendered Crime", "All Crime", "No Crime"))
fig3 <- fig3 + ggtitle("Some people think policewomen should investigate only gendered crime like dowry or rape.\nOther people think women should investigate all crime, even dacoity and terrorism. \nWhat do you think?") 
fig3#Figure A13



##Gender representation
#loading the women_bprd dataset
women_bprd <- read.csv("gender_representation_bprd.csv")

##2008 and 2017
jump_women <- data.frame(women_bprd$YEAR, women_bprd$STATE.UT, women_bprd$X..Women)
#
perc_jump <- subset(jump_women, women_bprd$YEAR == 2008 | women_bprd$YEAR == 2017)
perc_jump

dat1 <- subset(perc_jump, perc_jump$women_bprd.YEAR == "2008")

tenyears1 <- subset(perc_jump, women_bprd.YEAR=="2008")
tenyears2 <- subset(perc_jump, women_bprd.YEAR=="2017")
names(tenyears1) <- c("year", "state", "percent1")
names(tenyears2) <- c("year2", "state", "percent2")

dat  <- merge(tenyears1, tenyears2, by = "state")
dat$diff <- dat$percent2 - dat$percent1

dat2  <- select(dat, state, year2, diff)

names(dat1) <- c("year2", "state", "diff")

final <- rbind(dat1, dat2)

#
final$diff[final$diff <= -0.0000000000000000000001] <- 0

library(tools)

final$state <- as.character(final$state)
final$state <- sapply(final$state, tolower)
final$state <- toTitleCase(final$state)

figure_2 <- ggplot(data=final, aes(x=state, y=diff, fill=factor(year2, levels = c("2017","2008")))) + geom_bar(stat="identity") 
figure_2 <- figure_2 + coord_flip() + theme(plot.title = element_text(size = 20, face = "bold", hjust=0.5)) + 
  ggtitle("Gender Representation in the Indian Police in 2008 & 2017 (Bureau of Police Research & Development)") + geom_vline(aes(xintercept = 32), linetype="dashed") +
  xlab("") + ylab("Percent") + theme(legend.title = element_blank()) + 
  theme(axis.text.y = element_text(size = 15)) + scale_fill_manual(values=c("grey74", "grey26")) 

figure_2#Figure A1




###Reviewer 1: is one of the photo's more "frightening" than the other?
frightening <- read_excel("frightening_online_exp_data.xls")
names(frightening)

frightening$score <- frightening$`Frightening score 0 to 10`
frightening$policewoman <- frightening$`Photo assigned is policewoman`

reg <- lm(score~policewoman, data=frightening)
summary(reg)

######################################################################################
##
#CSDS-Common Cause Survey 2017


library(ggplot2);library(plyr);library(dplyr);library(ggsignif);library(reshape2);library(reshape2)
library(gridExtra);library(sandwich);library(lmtest);library(data.table);library(stargazer);library(psych);library(readxl)
library(broom);library(purrr);library(ggpubr);library(papeR);library(readr);library(REdaS);library(freqtables);library(ggplotify)


##Load proprietary CSDS-Lokniti. This can be purchased/accessed by permission at:
#https://www.lokniti.org/otherstudies/lokniti-csds-common-cause-status-of-policing-in-india-20
#https://www.lokniti.org/page/accessing-data

##Load full datasets here (numeric and character)
data <- read_excel("Lokniti_CSDS Police Study 2017_Data file.xlsx")
data2 <- read_excel("Lokniti_CSDS Police Study 2017_Data file non-numeric.xlsx")
data2 <- data2 %>% mutate_if(is.character,as.factor) 
#15,563 respondents, 194 variables

##Aggregate questions on bias (see attached questionnaire)
data2$strength <- revalue(data2$Q33a, c("1: Very justified"="Justified", "2: Somewhat justified"="Justified", "3: Somewhat unjustified"="Unjustified","4: Very unjustified"="Unjustified","8: Don't know"="Don't Know"))
data2$home <- revalue(data2$Q33b, c("1: Very justified"="Justified", "2: Somewhat justified"="Justified", "3: Somewhat unjustified"="Unjustified","4: Very unjustified"="Unjustified","8: Don't know"="Don't Know"))
data2$heinous <- revalue(data2$Q33c, c("1: Very justified"="Justified", "2: Somewhat justified"="Justified", "3: Somewhat unjustified"="Unjustified","4: Very unjustified"="Unjustified","8: Don't know"="Don't Know"))
data2$inflexible <- revalue(data2$Q33d, c("1: Very justified"="Justified", "2: Somewhat justified"="Justified", "3: Somewhat unjustified"="Unjustified","4: Very unjustified"="Unjustified","8: Don't know"="Don't Know"))

perception_a <- data2 %>% freq_table(Z3, strength)
perception_a <- perception_a[1:6,]#

fig1 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.5))
fig1 <- fig1 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig1 <- fig1 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("Police requires physical strength/forceful \n behavior which women lack.") 
fig1 <- fig1 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig1 <- fig1 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male Respondents","Female Respondents"))
fig1

####
perception_b <- data2 %>% freq_table(Z3, home)
perception_b <- perception_b[1:6,]#

fig2 <- ggplot(perception_b, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.5))
fig2 <- fig2 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig2 <- fig2 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("A woman should prioritize managing home \n instead of joining police.") 
fig2 <- fig2 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig2 <- fig2 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male Respondents","Female Respondents"))
fig2

###
perception_c <- data2 %>% freq_table(Z3, heinous)
perception_c <- perception_c[1:6,]#

fig3 <- ggplot(perception_c, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.5))
fig3 <- fig3 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig3 <- fig3 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Women police are incapable of handling \n high intensity crimes/cases.") 
fig3 <- fig3 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig3 <- fig3 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male Respondents","Female Respondents"))
fig3

###
perception_d <- data2 %>% freq_table(Z3, inflexible)
perception_d <- perception_d[1:6,]#

fig4 <- ggplot(perception_d, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.5))
fig4 <- fig4 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig4 <- fig4 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Because of inflexible working hours \n it is difficult for women to work in police.")
fig4 <- fig4 + theme(axis.text.x = element_text(colour="grey20", size=15, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=15)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig4 <- fig4 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male Respondents","Female Respondents"))
fig4

#Figure 1
fig_combined1 <- ggarrange(fig1, fig2, fig3, fig4,labels = c("1", "2","3","4"), ncol=2, nrow=2, common.legend = TRUE, legend="bottom")
fig_combined1


##We have two reasons for approaching the police. Table A8
prop.table(table(data2$Q3ba1))
prop.table(table(data2$Q3bb1))

##Create a factor where if approached police for GBV, non-GBV, not approached
#Reason1
data2$reason1 <- ifelse(grepl("02", data2$Q3ba1), "gbv", 
                        ifelse(grepl("05", data2$Q3ba1), 'gbv',
                               ifelse(grepl("09", data2$Q3ba1), 'gbv', 
                                      ifelse(grepl("99", data2$Q3ba1), 'no_experience', 'other_reason'))))
table(data2$reason1)

##Reason2
data2$reason2 <- ifelse(grepl("02", data2$Q3bb1), "gbv", 
                        ifelse(grepl("05", data2$Q3bb1), 'gbv',
                               ifelse(grepl("09", data2$Q3bb1), 'gbv', 
                                      ifelse(grepl("99", data2$Q3bb1), 'no_experience', 'other_reason'))))
table(data2$reason2)

##Now create a var that tells me if GBV in either column, or Other Reason, or NO experience
data2$contact_cops <- ifelse(grepl("gbv", data2$reason1) | grepl("gbv", data2$reason2), "vaw",
                             ifelse(grepl("no_experience", data2$reason1) & grepl("no_experience", data2$reason2), "none", 'other'))

table(data2$contact_cops)#So 682 citizens have reported this

##check
test <- select(data2, Q3ba1, Q3bb1, reason1, reason2, contact_cops)

##Biases
prop.table(table(data2$contact_cops, data2$strength),1)
prop.table(table(data2$contact_cops, data2$home),1)
prop.table(table(data2$contact_cops, data2$heinous),1)
prop.table(table(data2$contact_cops, data2$inflexible),1)

##Let's look at same as above but for female complainants
fem <- data2[with(data2, grepl("2: Female", data2$Z3)),]

##Biases
prop.table(table(fem$contact_cops, fem$strength),1)
prop.table(table(fem$contact_cops, fem$home),1)
prop.table(table(fem$contact_cops, fem$heinous),1)
prop.table(table(fem$contact_cops, fem$inflexible),1)

##Aggregate
perception_a <- data2 %>% freq_table(contact_cops, strength)

fig1 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.3, position = position_dodge(0.9))
fig1 <- fig1 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig1 <- fig1 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("Police requires physical strength/forceful \n behavior which women lack.") 
fig1 <- fig1 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 70, 5), limits=c(0, 70)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
fig1 <- fig1 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig1

####
perception_b <- data2 %>% freq_table(contact_cops, home)

fig2 <- ggplot(perception_b, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.9))
fig2 <- fig2 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig2 <- fig2 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("A woman should prioritize managing home \n instead of joining police.") 
fig2 <- fig2 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
fig2 <- fig2 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig2

###
perception_c <- data2 %>% freq_table(contact_cops, heinous)

fig3 <- ggplot(perception_c, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.9))
fig3 <- fig3 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig3 <- fig3 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Women police are incapable of handling \n high intensity crimes/cases.") 
fig3 <- fig3 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
fig3 <- fig3 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig3

###
perception_d <- data2 %>% freq_table(contact_cops, inflexible)

fig4 <- ggplot(perception_d, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-1.5, position = position_dodge(0.9))
fig4 <- fig4 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
fig4 <- fig4 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Because of inflexible working hours \n it is difficult for women to work in police.")
fig4 <- fig4 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
fig4 <- fig4 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig4

#Figure A5
fig_combined1 <- ggarrange(fig1, fig2, fig3, fig4, labels = c("1", "2","3","4"), ncol=2, nrow=2, common.legend = TRUE, legend="bottom")
fig_combined1

##Female respondents only
table(data2$contact_cops)
table(fem$contact_cops)
prop.table(table(data2$contact_cops))
prop.table(table(fem$contact_cops))

perception_a <- fem %>% freq_table(contact_cops, strength)

femfig1 <- ggplot(perception_a, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.2, position = position_dodge(0.9))
femfig1 <- femfig1 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
femfig1 <- femfig1 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("Police requires physical strength/forceful \n behavior which women lack.") 
femfig1 <- femfig1 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                           axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                         panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 75, 5), limits=c(0, 75)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
femfig1 <- femfig1 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
femfig1

####
perception_b <- fem %>% freq_table(contact_cops, home)

femfig2 <- ggplot(perception_b, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.2, position = position_dodge(0.9))
femfig2 <- femfig2 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
femfig2 <- femfig2 + theme(plot.title = element_text(size = 13, hjust=0.5)) +  ggtitle("A woman should prioritize managing home \n instead of joining police.") 
femfig2 <- femfig2 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                           axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                         panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
femfig2 <- femfig2 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
femfig2

###
perception_c <- fem %>% freq_table(contact_cops, heinous)

femfig3 <- ggplot(perception_c, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.3, position = position_dodge(0.9))
femfig3 <- femfig3 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
femfig3 <- femfig3 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Women police are incapable of handling \n high intensity crimes/cases.") 
femfig3 <- femfig3 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                           axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                         panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
femfig3 <- femfig3 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
femfig3

###
perception_d <- fem %>% freq_table(contact_cops, inflexible)

femfig4 <- ggplot(perception_d, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.9), width = 0.9) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2.3, position = position_dodge(0.9))
femfig4 <- femfig4 + theme(axis.ticks=element_blank(), axis.text.y = element_blank(), axis.title.x=element_blank(), legend.title=element_blank(), axis.title.y=element_blank()) + theme(text = element_text(size = 20)) 
femfig4 <- femfig4 + theme(plot.title = element_text(size = 13, hjust=0.5)) + ggtitle("Because of inflexible working hours \n it is difficult for women to work in police.")
femfig4 <- femfig4 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                           axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                         panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 60, 5), limits=c(0, 60)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.9), width = 0.9)
femfig4 <- femfig4 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
femfig4

#Figure 2
femfig_combined1 <- ggarrange(femfig1, femfig2, femfig3, femfig4, labels = c("1", "2","3","4"), ncol=2, nrow=2, common.legend = TRUE, legend="bottom")
femfig_combined1


##Table where its Q3ba1 and Q3ba2, for all respondents and female
reason_a <- data2 %>% dplyr::group_by(Q3ba1) %>% dplyr::summarize(count = n()) %>% mutate(prop = count / sum(count)) 
reason_a <- arrange(reason_a, desc(prop))
reason_a

reason_b <- data2 %>% dplyr::group_by(Q3bb1) %>% dplyr::summarize(count = n()) %>% mutate(prop = count / sum(count)) 
reason_b <- arrange(reason_b, desc(prop))
reason_b

##reason_a or reason_b for women
reason_aw <- fem %>% dplyr::group_by(Q3ba1) %>% dplyr::summarize(count = n()) %>% mutate(prop = count / sum(count)) 
reason_aw <- arrange(reason_aw, desc(prop))
reason_aw

reason_bw <- fem %>% dplyr::group_by(Q3bb1) %>% dplyr::summarize(count = n()) %>% mutate(prop = count / sum(count)) 
reason_bw <- arrange(reason_bw, desc(prop))
reason_bw

##join
names(reason_a)[1] <- "var"
names(reason_a)[2] <- "count1"
names(reason_a)[3] <- "prop1"

names(reason_b)[1] <- "var"
names(reason_b)[2] <- "count2"
names(reason_b)[3] <- "prop2"

look1 <- join(reason_a, reason_b, type='left')#

##Now women
names(reason_aw)[1] <- "var"
names(reason_aw)[2] <- "count3"
names(reason_aw)[3] <- "prop3"

names(reason_bw)[1] <- "var"
names(reason_bw)[2] <- "count4"
names(reason_bw)[3] <- "prop4"

look2 <- join(reason_aw, reason_bw, type='left')

##join 
final <- join(look1, look2, type='left')#Table A5
print(xtable(final, digits = 3), include.rownames=FALSE)

#####How many justified
##Combine justified statements (1 if it was justified)
data2$new_strength <- ifelse(data2$Q33a == "1: Very justified", 1, 
                             ifelse(data2$Q33a == "2: Somewhat justified", 1, 
                                    ifelse(data2$Q33a == "3: Somewhat unjustified", 0, 
                                           ifelse(data2$Q33a == "4: Very unjustified", 0, 
                                                  0))))

data2$new_home <- ifelse(data2$Q33b == "1: Very justified", 1, 
                         ifelse(data2$Q33b == "2: Somewhat justified", 1, 
                                ifelse(data2$Q33b == "3: Somewhat unjustified", 0, 
                                       ifelse(data2$Q33b == "4: Very unjustified", 0, 
                                              0))))

data2$new_heinous <- ifelse(data2$Q33c == "1: Very justified", 1, 
                            ifelse(data2$Q33c == "2: Somewhat justified", 1, 
                                   ifelse(data2$Q33c == "3: Somewhat unjustified", 0, 
                                          ifelse(data2$Q33c == "4: Very unjustified", 0, 
                                                 0))))

data2$new_inflexible <- ifelse(data2$Q33d == "1: Very justified", 1, 
                               ifelse(data2$Q33d == "2: Somewhat justified", 1, 
                                      ifelse(data2$Q33d == "3: Somewhat unjustified", 0, 
                                             ifelse(data2$Q33d == "4: Very unjustified", 0, 
                                                    0))))

#justified statements
data2 <- mutate(data2, justified = rowSums(cbind(new_strength,new_home,new_heinous,new_inflexible), na.rm = T))
table(data2$justified)
summary(data2$justified)

###for male/female respondents
data2$justified <- as.factor(data2$justified)

perception_e <- data2 %>% freq_table(Z3, justified)
perception_e <- perception_e[1:10,]#

#Figure A2
fig5 <- ggplot(perception_e, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-2, size=3, position = position_dodge(0.5)) + ylab("Percent") + xlab("No. of Statements Justified") 
fig5 <- fig5 + theme(plot.title = element_text(size = 12, hjust=0.5)) + ggtitle("")
fig5 <- fig5 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=12)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 40, 5), limits=c(0, 40)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig5 <- fig5 + scale_fill_manual(values = c("gray71", "grey40"), labels=c("Male","Female")) 
fig5 <- fig5 + theme(text = element_text(size = 12)) + theme(legend.title = element_blank()) + theme(legend.position="bottom")
fig5


###barplot for male/female respondents
data2$justified <- as.factor(data2$justified)

perception_e <- data2 %>% freq_table(contact_cops, justified)


fig6 <- ggplot(perception_e, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-7, size=3, position = position_dodge(0.5)) + ylab("Percent") + xlab("No. of Statements Justified") 
fig6 <- fig6 + theme(plot.title = element_text(size = 12, hjust=0.5)) + ggtitle("")
fig6 <- fig6 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=12)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 45, 5), limits=c(0, 45)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig6 <- fig6 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig6 <- fig6 + theme(text = element_text(size = 12)) + theme(legend.title = element_blank()) + theme(legend.position="bottom")
fig6


##Female respondents
fem <- data2[with(data2, grepl("2: Female", data2$Z3)),]

perception_e <- fem %>% freq_table(contact_cops, justified)

fig7 <- ggplot(perception_e, aes(col_cat, percent_row, group=row_cat, ymin=lcl_row, ymax=ucl_row)) + 
  geom_col(aes(fill = row_cat),position = position_dodge(0.5), width = 0.4) +
  geom_text(aes(label = round(percent_row,1)), vjust=-7, size=3,position = position_dodge(0.5)) + ylab("Percent") + xlab("No. of Statements Justified") 
fig7 <- fig7 + theme(plot.title = element_text(size = 12, hjust=0.5)) + ggtitle("")
fig7 <- fig7 + theme(axis.text.x = element_text(colour="grey20", size=12, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=12)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + scale_y_continuous(breaks = seq(0, 45, 5), limits=c(0, 45)) + theme(text = element_text(size = 20)) + geom_errorbar(aes(ymin = lcl_row, ymax = ucl_row),position = position_dodge(0.5), width = 0.4)
fig7 <- fig7 + scale_fill_manual(values = c("gray71", "grey40", "grey20"), labels=c("No Contact","Contact:Other", "Contact:VAW"))
fig7 <- fig7 + theme(text = element_text(size = 12)) + theme(legend.title = element_blank()) + theme(legend.position="bottom")
fig7


##Figure A6
fcombined <- ggarrange(fig6, fig7, labels = c("1", "2"), ncol=2, nrow=1)
fcombined


####difference between men and women BY STATE and avg. responses
#mean score across state for all and for women
data2$justified <- as.numeric(data2$justified)
aggregate(data2$justified, list(data2$State_id), FUN=mean) 

data2$State_id <- gsub("*..:", "", data2$State_id)#


fig1 <- ggplot(data2, aes(x = reorder(State_id, justified), y = justified)) +      
  stat_summary(geom = "bar", fun = mean, position = "dodge") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
  stat_summary(aes(label=round(..y..,2)), fun=mean, geom="text", size=6,hjust = -0.5)
fig1 <- fig1 + scale_fill_grey() + theme(legend.title=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())+ ggtitle("All Respondents")
fig1 <- fig1 + coord_flip(ylim = c(0, 4.5))
fig1 <- fig1 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(text = element_text(size = 13)) 
fig1

#Female respondenets
fem <- data2[with(data2, grepl("2: Female", data2$Z3)),]
fig2 <- ggplot(fem, aes(x = reorder(State_id, justified), y = justified)) +      
  stat_summary(geom = "bar", fun = mean, position = "dodge") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position = "dodge") +
  stat_summary(aes(label=round(..y..,2)), fun.y=mean, geom="text", size=6,hjust = -0.5)
fig2 <- fig2 + coord_cartesian(xlim = c(0, 5))
fig2 <- fig2 + scale_fill_grey() + theme(legend.title=element_blank(),axis.title.y=element_blank(),axis.title.x=element_blank())+ ggtitle("Female Respondents")
fig2 <- fig2 + coord_flip(ylim = c(0, 4.5))
fig2 <- fig2 + theme(axis.text.x = element_text(colour="grey20", size=13, hjust=.5, vjust=.5),
                     axis.text.y = element_text(colour="grey20", size=13)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                                                                                   panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(text = element_text(size = 13)) 
fig2

#Figure A3
ggarrange(fig1, fig2)

#
#Difference between men and women across states 
data2$female <- ifelse(grepl("2: Female", data2$Z3), 1, 0) 

#
stor1 <- data2 %>% group_by(State_id) %>% group_map(~ t.test(justified ~ female, .x))

store <- data2 %>% group_by(State_id) %>% do(broom::tidy(t.test(justified ~ female, data=.)))

#compare
bihar <- data2[with(data2, grepl("Bihar", data2$State_id)),]
male <- bihar[with(bihar, grepl("1: Male", bihar$Z3)),]
fem <- karnataka[with(bihar, grepl("2: Female", bihar$Z3)),]
t.test(male$justified, fem$justified)

#Let's plot this:
plot1 <- ggplot(store,aes(x= reorder(State_id, estimate),y=estimate,ymin=conf.low, ymax=conf.high))+
  geom_pointrange() + coord_flip() + ggtitle("Difference Between Male and Female Respondents By State") +
  xlab("") + ylab("") + geom_hline(yintercept = 0, linetype='dashed', color='red', size=1) +
  theme(axis.text.x = element_text(colour="grey20", size=14, hjust=.5, vjust=.5),
        axis.text.y = element_text(colour="grey20", size=12),
        text=element_text(size=16, family="Arial")) + theme(plot.title = element_text(size = 15, hjust=0.5))
plot1 <- plot1 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                       panel.background = element_blank(), axis.line = element_line(colour = "black")) + ylim(-0.55, 0.75)

plot1 <- plot1 + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0, alpha = .15)
plot1##Figure A4


#######################
##Create vars
test3 <- select(data2, new_strength, new_heinous, new_home, new_inflexible)
psych::alpha(test3)

data2 <- mutate(data2, index4 = rowSums(cbind(new_strength,new_heinous,new_home, new_inflexible), na.rm = T))
table(data2$index4)
summary(data2$index4)

##########################################
data3 <- select(data2, State_id, Z3, Q34a, Q33a, Q33b, Q33c, Q33d, Z2, Z4, Q3, Z5, Z5b, Z7, Z8a, Z9, Z13, 
                Z19, E1, F7, Q11a, Q11b, Q11c, Z7a, Q3ba1, Q3bb1, Q22c, Q32d, Z5, Q23e, Z5a, Q29b, Q3, Q3a, Q3ba1, 
                Q3ba2, Q3bb2, Q29, Q29a, Q25b, Q41, Q6a, Q8a, index4, Q6, Q8, Q9, Q12a, Q12b, Q19, contact_cops)

#
data3$female <- ifelse(grepl("2: Female", data3$Z3), 1, 0) 
data3$male <- ifelse(grepl("1: Male", data3$Z3), 1, 0) 

#Z2 age: convert to numeric
table(data3$Z2)#need to change 95 and 98 into number and NA
data3$age <- data3$Z2 #duplicate
data3$age <- revalue(data3$age, c("95: 95 years and above"="95", "98: No response"=NA))
data3$age <- as.numeric(data3$age)

#Z4 education (change levels of factor)
##0/1= below primary/non-literate, 3/4 (less than HS), 4 (HS), 5 (some college), 6/7/8 grad
data3$education <- data3$Z4
data3$education <- revalue(data3$education, c("0: Non Literate"=1, "1: Below Primary"=2, "2: Primary pass/ Middle fail"
                                              =3, "3: Middle pass/Matric Fail"=4,"4: Matric"=5,
                                              "5: Intermediate/ College no degree"=6,"6: Graduate or equivalent"=7,
                                              "7: Post Graduate"=8,"8: Professional Degrees and Higher Research"=9,"9: N.A."=NA))
data3$education <- as.integer(data3$education)
table(data3$education)

#Z7 marital status (as.factor) - ever married (married, divorced, deserted, separated) (1), married (guana, and unmarried/single/live with partner)
data3$marital <- data3$Z7
data3$marital <- revalue(data3$marital, c("1: Married"="ever married", "3: Widowed"="ever married","4: Divorced"="ever married","5: Separated"="ever married",
                                          "6: Deserted"="ever married","2: Married (Gauna not performed, not started living together)"="unmarried","7: Unmarried/Single"="unmarried",
                                          "8: Live with partner but not married"="unmarried","9: No response"=NA))

data3$marital2 <- ifelse(grepl("unmarried", data3$marital), 1, 0)#unmarried
data3$marital3 <- ifelse(grepl("unmarried", data3$marital), 0, 1)#Ever married [include with housewife]

#Z8a caste category (as.factor)
data3$caste <- data3$Z8a
data3$caste <- revalue(data3$caste, c("4: Other"="Others", "9: N.A."="Others"))
table(data3$caste)

#Z9 religion 
data3$religion <- data3$Z9
data3$religion <- revalue(data3$religion, c("5: Buddhist/Neo Buddhist"="Others", "6: Jain"="Others","7: Parsi"="Others","8: No religion"="Others","9: Others"="Others"))

#Z13 locality
data3$urban <- ifelse(grepl("Village|Town", data3$Z13), 0, 1)#urban if NOT village or town

#Z19 monthly household income numeric
#E1 observer effects (as.factor)
table(data3$E1)
data3$observer <- ifelse(grepl("1: No one", data3$E1), "private", "not private") 

#F7 interviewer effects
data3$F7 <- as.factor(data3$F7)

#Q3 experience with police
data3$experience<- ifelse(data3$Q3 == "2: Yes", 1, 0)

##
male <- data3[with(data3, grepl("1: Male", data3$Z3)),]
fem <- data3[with(data3, grepl("2: Female", data3$Z3)),]

##Descriptive Statistics Table
library(papeR)

##
library(fastDummies)
data3 <- dummy_cols(data3, select_columns = c("caste", "religion", "urban","observer"))


##Rename caste/religion variables in data3
names(data3)
names(data3)[names(data3)=="caste_1: Scheduled Caste (SC)"] <- "sc"
names(data3)[names(data3)=="caste_2: Scheduled Tribe (ST)"] <- "st"
names(data3)[names(data3)=="caste_3: Other Backward Classes (OBC)"] <- "obc"
names(data3)[names(data3)=="religion_2: Muslim"] <- "muslim"

names(data3)[names(data3)=="religion_3: Christian"] <- "christian"
names(data3)[names(data3)=="religion_4: Sikh"] <- "sikh"
names(data3)[names(data3)=="religion_Others"] <- "Other"

data4 <- select(data3, index4, female, age, education, marital3, sc, st, obc, muslim, christian, sikh, urban_1, experience, observer_private, contact_cops, experience)

prop.table(table(data4$experience, useNA = "always"))
prop.table(table(data4$contact_cops,useNA = "always"))

## dataframe 
data4 <- as.data.frame(data4)

#convert female to factor
data4$female <- as.factor(data4$female)

#Summary grouped by male/female respondents
test <- xtable(papeR::summarize(data4, type = "numeric", group = "female"))

test <- test[,c(1,2,4,7,8)]
print(xtable(test), include.rownames=FALSE)

#Summary without grouping
test2 <- xtable(papeR::summarize(data4))
test2 <- test2[,c(1,2,5,6)]
print(xtable(test2), include.rownames=FALSE)

##join 
##Table A4
names(test)[1] <- "variable"
names(test2)[1] <- "variable"
look <- join(test, test2, type='left', by='variable')
print(xtable(look, digits = 2), include.rownames=FALSE)


##Same vars as function of interaction with police
data4$contact_cops <- as.factor(data4$contact_cops)
table(data4$contact_cops)

#Table A6
test <- xtable(papeR::summarize(data4, type = "numeric", group = "contact_cops", test=FALSE))
test <- test[,c(1,2,4,7,8)]
print(xtable(test), include.rownames=FALSE)

##Let's look at women complainants only
#Table A7
fem <- data4[with(data4, grepl("1", data4$female)),]

test <- xtable(papeR::summarize(fem, type = "numeric", group = "contact_cops", test=FALSE))
test <- test[,c(1,2,4,7,8)]
print(xtable(test), include.rownames=FALSE)


